Structure fire is one of the most common yet destructive urban disasters. The Philadelphia Fire Department responds to hundreds or even thousands of locations everyday to quell an array of emergencies. On Nov. 21, 2020, two children were killed in a rowhome fire in Philadelphia’s Grays Ferry neighborhood despite rescue efforts by Philadelphia firefighters and desperate neighbors. The fire was reported at about 1:15 a.m. Although firefighters responded within two minutes, the row home was consumed by fire when they arrived.
usecase
It’s not yet known if the home had working smoke detectors, but the fire risk for each building is not the same. In a recent study, conducted by American Survey CO, for the period of 2005 - 2010, the causes of house fires across America were as follows:
Besides those immediate cause of fire, there are other factors that are important to fire risk prediction and worth fire fighter awareness. For instance, code violation and 311 request can be a vital factor for fire risk.
Currently, the Fire Department has litte ‘situational awareness’ of fire risk for a given location when an emergency call comes in. Therefore, we are going to help them by creating a parcel-level (building) fire risk score prediction for each property in the city, and by providing 2 deliverables:
API
api-interface
api-interface
api-interface
api-interface
In addition, we want to let residents get a real-time update of the fire risk of their houses so they will have a situational awareness on risk for each property citywide.
join
story
We are going to consider fire risk by gathering parcel level data (indexed by the ‘OPA_Account’ number, including Property Feature and Code Violation), environment factors (including 311 Request and Census Data) , and past fire events (Time-Spatial Lag) to create an API that can retrieve relevant data given an address query.
library(tidyverse)
library(sf)
library(QuantPsyc)
library(RSocrata)
library(viridis)
library(caret)
library(spatstat)
library(spdep)
library(FNN)
library(grid)
library(gridExtra)
library(knitr)
library(kableExtra)
library(tidycensus)
library(mapview)
library(httr)
library(dplyr)
library(readxl)
library(stringr)
library(raster)
library(ggplot2)
library(lubridate)
qBr <- function(df, variable, rnd) {
if (missing(rnd)) {
as.character(quantile(round(df[[variable]],0),
c(.01,.2,.4,.6,.8), na.rm=T))
} else if (rnd == FALSE | rnd == F) {
as.character(formatC(quantile(df[[variable]]), digits = 3),
c(.01,.2,.4,.6,.8), na.rm=T)
}
}
q5 <- function(variable) {as.factor(ntile(variable, 5))}
palette5 <- c("#25CB10", "#5AB60C", "#8FA108", "#C48C04", "#FA7800")
mapTheme <- function(base_size = 12) {
theme(
text = element_text( color = "black"),
plot.title = element_text(size = 14,colour = "black"),
plot.subtitle=element_text(face="italic"),
plot.caption=element_text(hjust=0),
axis.ticks = element_blank(),
panel.background = element_blank(),axis.title = element_blank(),
axis.text = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=2)
)
}
plotTheme <- function(base_size = 12) {
theme(
text = element_text( color = "black"),
plot.title = element_text(size = 14,colour = "black"),
plot.subtitle = element_text(face="italic"),
plot.caption = element_text(hjust=0),
axis.ticks = element_blank(),
panel.background = element_blank(),
panel.grid.major = element_line("grey80", size = 0.1),
panel.grid.minor = element_blank(),
panel.border = element_rect(colour = "black", fill=NA, size=2),
strip.background = element_rect(fill = "grey80", color = "white"),
strip.text = element_text(size=12),
axis.title = element_text(size=12),
axis.text = element_text(size=10),
plot.background = element_blank(),
legend.background = element_blank(),
legend.title = element_text(colour = "black", face = "italic"),
legend.text = element_text(colour = "black", face = "italic"),
strip.text.x = element_text(size = 14)
)
}
# count function
count_net <- function(data,fishnetGrid, name){
count<- data %>% #get count by fishnet
dplyr::select() %>%
mutate(count = 1) %>%
aggregate(., fishnetGrid, sum) %>%
mutate(count = ifelse(is.na(count), 0, count),uniqueID = rownames(.))%>%
plyr::rename(.,c('count' = name))
return (count)
}
# kernel density function
kernelDensity <- function(data, fishnetGrid, name){
count<- data %>% #get count by fishnet
dplyr::select() %>%
mutate(count = 1) %>%
aggregate(., fishnetGrid, sum) %>%
mutate(count = ifelse(is.na(count), 0, count),
uniqueID = rownames(.),
cvID = sample(round(nrow(fishnetGrid) / 24), size=nrow(fishnetGrid), replace = TRUE))
point_ppp <- as.ppp(st_coordinates(data), W = st_bbox(count))
KD <- spatstat::density.ppp(point_ppp, 1000)
KD_dataframe<-as.data.frame(KD) %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(fishnetGrid)) %>%
aggregate(.,fishnetGrid, mean)%>%plyr::rename(.,c('value' = name))
return(KD_dataframe)
}
#Nearest neighbor (NND) function
nn_function <- function(measureFrom,measureTo,k) {
measureFrom_Matrix <- as.matrix(measureFrom)
measureTo_Matrix <- as.matrix(measureTo)
nn <-
get.knnx(measureTo, measureFrom, k)$nn.dist
output <-
as.data.frame(nn) %>%
rownames_to_column(var = "thisPoint") %>%
gather(points, point_distance, V1:ncol(.)) %>%
arrange(as.numeric(thisPoint)) %>%
group_by(thisPoint) %>%
summarize(pointDistance = mean(point_distance)) %>%
arrange(as.numeric(thisPoint)) %>%
dplyr::select(-thisPoint) %>%
pull()
return(output)
}
palette2 <- c("#F6C729","#800080")
palette3 <- c("#800080","#F6C729")
#setwd("~/Desktop/City Planning/2021 spring/801")
#fire2019 <- read_csv("./data/fire_dataset.csv")
fire2019 <- read_excel("./data/fire.xlsx")
fire2020 <- read_excel("./data/fire2020.xlsx")
fire2020 <- fire2020%>%mutate(addr_type = 1)
fire2019 <- subset(fire2019, select = -c(city,state,descript_b))
fire2020 <- subset(fire2020, select = -c(Propuse_desc,Code,Act_desc,Census,Occup_id))
fire2019 <-
fire2019 %>%
mutate(date=dmy(alm_date),
Year=year(date),
Month=month(date))
fire2020 <-
fire2020 %>%
mutate(date=as.Date(alm_date),
Year=year(date),
Month=month(date))
fire <- rbind(fire2019, fire2020)
#setdiff(names(fire2020),names(fire2019))
#setdiff(names(fire2019),names(fire2020))
property <- read_csv("./data/opa_properties_public.csv")
parcel <- read_csv("./data/DOR_Parcel.csv")
#opa <- read_csv("./data/Fire_OPA_Parcel.csv")
opa <- read_csv("./data/all_opa_par_addr.csv")
#opa[opa$Freq > 1,]
violation <- read_csv("./data/violations.csv")
#join fire with opa num
fire1 = fire %>%
filter(addr_type ==1)
fire1$address <- paste(ifelse(is.na(fire1$number)==FALSE,fire1$number,''),
"%20",
ifelse(is.na(fire1$st_prefix)==FALSE,fire1$st_prefix,''),
"%20",
ifelse(is.na(fire1$street)==FALSE,fire1$street,''),
"%20",
ifelse(is.na(fire1$st_type)==FALSE,fire1$st_type,''), sep = "")
fire2 = fire %>%
filter(addr_type ==2)
fire2$address <- paste(ifelse(is.na(fire2$xst_prefix)==FALSE,fire2$xst_prefix,''),
ifelse(fire2$xst_prefix!='',"%20",''),
ifelse(is.na(fire2$xstreet)==FALSE,fire2$xstreet,''),
"%20",
ifelse(is.na(fire2$xst_type)==FALSE,fire2$xst_type,''),
"%20",
"&",
"%20",
ifelse(is.na(fire2$st_prefix)==FALSE,fire2$st_prefix,''),
ifelse(fire2$st_prefix!='',"%20",''),
ifelse(is.na(fire2$street)==FALSE,fire2$street,''),
"%20",
ifelse(is.na(fire2$st_type)==FALSE,fire2$st_type,''),
sep = "")
fireData <- rbind(fire1, fire2)
#fireData$MUSA_ID <- paste0("MUSA_",1:nrow(fireData))
fire_opa <- left_join(fireData,opa,by='address')
#join property data with fire data
property <- rename(property, OPA_Num = parcel_number)
property_fire <- left_join(property,fire_opa,by='OPA_Num')
parcel <- rename(parcel, registry_number = BASEREG)
parcel_pro_fire <- left_join(parcel,property_fire,by='registry_number')
parcel_pro_fire = parcel_pro_fire %>%filter(ADDR_STD==location)
sort(names(parcel_pro_fire))
limit <-
st_read("./data/City_Limits-shp/city_limits.shp") %>%
st_transform(2272)
#ggplot() +
# geom_sf(data = limit)
tracts <-
st_read("http://data.phl.opendata.arcgis.com/datasets/ccdc0d022f4949a7b11333bd37231aef_0.geojson")%>%
st_set_crs(4326) %>%
na.omit() %>%
st_transform(crs=2272)
neighborhoods <-
st_read("https://raw.githubusercontent.com/azavea/geo-data/master/Neighborhoods_Philadelphia/Neighborhoods_Philadelphia.geojson")%>%
st_set_crs(4326) %>%
na.omit() %>%
st_transform(crs=2272)
dor <-
st_read("./data/DOR_Parcel/DOR_Parcel.shp")
# load the fire data
coords = st_centroid(dor)
parcel.sf <-
st_as_sf(coords, crs = 4326) %>%
st_transform(2272)
parcel.sf <- rename(parcel.sf, registry_number = BASEREG)
parcel.sf2 <- left_join(parcel.sf,property_fire,by='registry_number')
parcel.sf2 = parcel.sf2 %>%filter(ADDR_STD==location)
#setdiff(names(parcel.sf2),names(parcel_pro_fire))
#setdiff(names(parcel_pro_fire),names(parcel.sf2))
#parcel.sf2 <- subset(parcel.sf2, select = -c(ADDR_SOURC,SEPARATED_,MUNIMENT_T,MUNIMENT_I,Shape__Are,Shape__Len))
sort(names(parcel.sf2))
#parcel.sf2[parcel.sf2$Freq > 1,]
#parcel.sf2[parcel.sf2$id %in% parcel.sf2$Var1[parcel.sf2$Freq > 1],]
parcel.sf2 <- dplyr::distinct(parcel.sf2, .keep_all = TRUE)
parcel.sf2 <- mutate(parcel.sf2, isfire = case_when(
is.na(inci_no) == FALSE ~ "Have fire",
is.na(inci_no) == TRUE ~ "No fire"))
The first risk factor is the spatial correlation of fire incidents. To understand if fires have a tendency to cluster in Philadelphia, we examined the average distance to 5 nearest fire event for each parcel. The results indicate that buildings with a smaller distance to past fires have a higher risk for future fire event.
#print(as.Date('1/1/2020'))
#print(as.Date('01-Jan-15'))
#print(ymd('01-Jan-15'))
#print(ymd('1/1/2020'))
#unique(fire.sf$Year)
sort(table(parcel.sf2$Year),decreasing=TRUE)
##
## 2018 2015 2019 2016 2017 2020
## 3036 2740 2670 2459 2249 745
firebyyear<-subset(parcel.sf2, !is.na(Year))
ggplot(firebyyear, aes(x=factor(Year))) +
geom_bar(aes(fill=factor(Year)),stat="count") +
scale_fill_viridis(discrete=TRUE, option='inferno')+
labs(title = "Fire by Year", x = "Fire Year", y = "Count") +
plotTheme()
#unique(fire.sf$descript)
sort(table(firebyyear$descript),decreasing=TRUE)
##
## Cooking fire, confined to container
## 3852
## Building Fire, No Collapse
## 3329
## Building fire
## 1803
## Outside rubbish fire, Other
## 1061
## Outside rubbish, trash or waste fire
## 877
## Passenger vehicle fire
## 738
## Trash or rubbish fire, contained
## 428
## Mobile property (vehicle) fire, Other
## 391
## Brush or brush-and-grass mixture fire
## 172
## Special outside fire, Other
## 151
## Fires in structure other than in a building
## 150
## Fuel burner/boiler malfunction, fire confined
## 145
## Oil Burner/Heater, fire confined
## 138
## Natural vegetation fire, Other
## 121
## Dumpster or other outside trash receptacle fire
## 105
## Fire Involving Civilian Only
## 56
## Building fire, Interior Collapse, after PFD arriv
## 51
## Chimney or flue fire, confined to chimney or flue
## 50
## Grass fire
## 42
## Outside equipment fire
## 40
## Fire, Other
## 32
## Pot of Meat
## 24
## Building Fire, Interior Collapse, before PFD arriv
## 22
## Building fire, Exterior Collapse, before PFD arriv
## 19
## Building fire, Exterior Collapse, after PFD arriv
## 18
## Forest, woods or wildland fire
## 18
## Road freight or transport vehicle fire
## 16
## Outside storage fire
## 9
## Building Fire in a School
## 6
## Cultivated trees or nursery stock fire
## 4
## Off-road vehicle or heavy equipment fire
## 4
## Tarpot Fire
## 4
## Cultivated vegetation, crop fire, Other
## 3
## Incinerator overload or malfunction, fire confined
## 3
## Outside gas or vapor combustion explosion
## 3
## Commercial Compactor fire, confined to rubbish
## 2
## Construction or demolition landfill fire
## 2
## Fire in mobile prop used as a fixed struc, Other
## 2
## Outside stationary compactor/compacted trash fire
## 2
## Water vehicle fire
## 2
## Building fire, Ext Coll, bef PFD arriv, unatt bear
## 1
## Camper or recreational vehicle (RV) fire
## 1
## Fire in portable building, fixed location
## 1
## Garbage dump or sanitary landfill fire
## 1
firep <- firebyyear %>% filter(firebyyear$descript == 'Cooking fire, confined to container' |firebyyear$descript == 'Building Fire, No Collapse' |firebyyear$descript =='Building fire' |firebyyear$descript =='Outside rubbish fire, Other' |firebyyear$descript =='Outside rubbish, trash or waste fire' |firebyyear$descript =='Passenger vehicle fire' |firebyyear$descript =='Trash or rubbish fire, contained' |firebyyear$descript =='Mobile property (vehicle) fire, Other' |firebyyear$descript =='Brush or brush-and-grass mixture fire' |firebyyear$descript =='Special outside fire, Other')
firep <- within(firep,
descript <- factor(descript,
levels=names(sort(table(descript),
decreasing=TRUE))))
ggplot(firep, aes(x=factor(descript))) +
geom_bar(aes(fill=factor(descript)),stat="count") +
scale_fill_viridis(discrete=TRUE, option='inferno', direction = -1)+
labs(title = "Fire by Type", x = "Fire Type", y = "Count") +
theme(axis.text.x=element_blank())+
plotTheme()
fire.sf <- firebyyear
fire.sf_20 <- fire.sf[fire.sf$Year==2020,]
fire.sf_202 <- fire.sf_20[!is.na(fire.sf_20$Year),]
fire.sf_19 <- fire.sf[fire.sf$Year==2019,]
fire.sf_192 <- fire.sf_19[!is.na(fire.sf_19$Year),]
#fire.sf_192[fire.sf_192$Freq > 1,]
fire.sf_18 <- fire.sf[fire.sf$Year==2018,]
fire.sf_182 <- fire.sf_18[!is.na(fire.sf_18$Year),]
fire.sf_17 <- fire.sf[fire.sf$Year==2017,]
fire.sf_172 <- fire.sf_18[!is.na(fire.sf_17$Year),]
fire.sf_16 <- fire.sf[fire.sf$Year==2016,]
fire.sf_162 <- fire.sf_18[!is.na(fire.sf_16$Year),]
fire.sf_15 <- fire.sf[fire.sf$Year==2015,]
fire.sf_152 <- fire.sf_18[!is.na(fire.sf_15$Year),]
# the distance from the nearest 5 fires to the center of the grid for each year(2015-2020)
parcel.sf2$lagfire20.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(fire.sf_202),5)
parcel.sf2$lagfire19.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(fire.sf_192),5)
parcel.sf2$lagfire18.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(fire.sf_182),5)
parcel.sf2$lagfire17.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(fire.sf_172),5)
parcel.sf2$lagfire16.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(fire.sf_162),5)
parcel.sf2$lagfire15.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(fire.sf_152),5)
nn19 <- parcel.sf2 %>%
ggplot(aes(isfire, lagfire19.nn5, fill=isfire)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean") +
scale_fill_manual(values = palette2) +
labs(x="isfire", y="Value",
title = "Distance to past fires (nn5), 2019") +
theme(legend.position = "none")
nn20 <- parcel.sf2 %>%
ggplot(aes(isfire, lagfire20.nn5, fill=isfire)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean") +
scale_fill_manual(values = palette2) +
labs(x="isfire", y="Value",
title = "Distance to past fires (nn5), 2020") +
theme(legend.position = "none")
grid.arrange(nn19,nn20, nrow = 1)
parcel.sf2.sample <- parcel.sf2[sample(nrow(parcel.sf2), 30000), ]
nn192 <- ggplot()+
geom_sf(data = limit%>%st_transform(4326), fill = "grey", color = NA) +
geom_point(data=parcel.sf2.sample, aes(x=lat,y=lng,color = lagfire19.nn5), size=0.2) +
scale_color_viridis(option='inferno', direction = -1) +
labs(subtitle = "Distance to past fires (nn5), 2019") +
mapTheme()
nn202 <- ggplot()+
geom_sf(data = limit%>%st_transform(4326), fill = "grey", color = NA) +
geom_point(data=parcel.sf2.sample, aes(x=lat,y=lng,color = lagfire20.nn5), size=0.2) +
scale_color_viridis(option='inferno', direction = -1) +
labs(subtitle = "Distance to past fires (nn5), 2020") +
mapTheme()
grid.arrange(nn192,nn202, nrow = 1)
fire_neighborhoods <-
fire.sf_192 %>%
dplyr::select() %>%
mutate(count = 1) %>%
aggregate(., neighborhoods, sum) %>%
mutate(count = ifelse(is.na(count), 0, count),
uniqueID = rownames(.),
cvID = sample(round(nrow(neighborhoods) / 24), size=nrow(neighborhoods), replace = TRUE))
fire_neighborhoods2 <-
fire.sf_202 %>%
dplyr::select() %>%
mutate(count = 1) %>%
aggregate(., neighborhoods, sum) %>%
mutate(count = ifelse(is.na(count), 0, count),
uniqueID = rownames(.),
cvID = sample(round(nrow(neighborhoods) / 24), size=nrow(neighborhoods), replace = TRUE))
hood19 <- ggplot() +
geom_sf(data = fire_neighborhoods, aes(fill = count)) +
scale_fill_viridis(option='inferno') +
labs(title = "Fire by Neighborhood, 2019") +
mapTheme()
hood20 <- ggplot() +
geom_sf(data = fire_neighborhoods2, aes(fill = count)) +
scale_fill_viridis(option='inferno') +
labs(title = "Fire by Neighborhood, 2020") +
mapTheme()
grid.arrange(hood19,hood20, nrow = 1)
fire_tracts <-
fire.sf_192 %>%
dplyr::select() %>%
mutate(count = 1) %>%
aggregate(., tracts, sum) %>%
mutate(count = ifelse(is.na(count), 0, count),
uniqueID = rownames(.),
cvID = sample(round(nrow(tracts) / 24), size=nrow(tracts), replace = TRUE))
fire_tracts2 <-
fire.sf_202 %>%
dplyr::select() %>%
mutate(count = 1) %>%
aggregate(., tracts, sum) %>%
mutate(count = ifelse(is.na(count), 0, count),
uniqueID = rownames(.),
cvID = sample(round(nrow(tracts) / 24), size=nrow(tracts), replace = TRUE))
tract19 <- ggplot() +
geom_sf(data = fire_tracts, aes(fill = count)) +
scale_fill_viridis(option='inferno') +
labs(title = "Fire by Tract, 2019") +
mapTheme()
tract20 <- ggplot() +
geom_sf(data = fire_tracts2, aes(fill = count)) +
scale_fill_viridis(option='inferno') +
labs(title = "Fire by Tract, 2020") +
mapTheme()
grid.arrange(tract19,tract20, nrow = 1)
census <- get_acs(geography = "tract",
variables=c("B01003_001", "B19013_001",
"B02001_002", "B01002_001","B06009_005"),
key="d72b594e4d0f9b9b34217cdea8a4bcbc60354e21",
state=42,
geometry=TRUE,
county=101,
output="wide")
census1 <- census%>%
rename(Total_Pop=B01003_001E,
Med_Inc=B19013_001E,
Med_Age=B01002_001E,
White_Pop=B02001_002E,
Bachelor_Pop=B06009_005E) %>%
dplyr::select(Total_Pop, Med_Inc, White_Pop,Bachelor_Pop, Med_Age,geometry) %>%
mutate(Percent_White = White_Pop / Total_Pop,Percent_Bachelor=Bachelor_Pop/ Total_Pop)%>%
dplyr::select(- White_Pop,-Bachelor_Pop)
#write_csv(census1,'census1')
parcel.sf2 <-
st_join(parcel.sf2,census1%>% st_transform(2272),st_covered_by,left=TRUE)
cen1 <- ggplot() +
geom_sf(data = census1, aes(fill = Med_Inc)) +
scale_fill_viridis(option='inferno') +
labs(title = "Median Income, 2019") +
mapTheme()
cen2 <- ggplot() +
geom_sf(data = census1, aes(fill = Med_Age)) +
scale_fill_viridis(option='inferno') +
labs(title = "Median Age, 2019") +
mapTheme()
cen3 <- ggplot() +
geom_sf(data = census1, aes(fill = Total_Pop)) +
scale_fill_viridis(option='inferno') +
labs(title = "Total Population, 2019") +
mapTheme()
cen4 <- ggplot() +
geom_sf(data = census1, aes(fill = Percent_White)) +
scale_fill_viridis(option='inferno') +
labs(title = "Percentage White, 2019") +
mapTheme()
grid.arrange(cen1,cen2,cen3,cen4, nrow = 2)
We looked into all the properties in Philadelphia to examine whether there is a higher percentage of fire occurrence in the properties with certain features.
parcel.sf2 <-mutate(parcel.sf2, category = case_when(
category_code == 1 ~ "Residential",
category_code == 2 ~ "Hotels and Apartments",
category_code == 3 ~ "Store with Dwelling",
category_code == 4 ~ "Commercial",
category_code == 5 ~ "Industrial",
category_code == 6 ~ "Vacant Land"))
parcel.sf2 <-mutate(parcel.sf2, interior = case_when(
interior_condition == 0 ~ "Not Applicable",
interior_condition == 2 ~ "New/Rehabbed",
interior_condition == 3 ~ "Above Average",
interior_condition == 4 ~ "Average",
interior_condition == 5 ~ "Below Average",
interior_condition == 6 ~ "Vacant",
interior_condition == 7 ~ "Sealed / Structurally Compromised"))
parcel.sf2$year_built <- as.numeric(as.character(parcel.sf2$year_built))
parcel.sf2 <-mutate(parcel.sf2, year_built_cat = case_when(
year_built >= 1600 & year_built < 1800 ~ "1600-1800",
year_built >= 1800 & year_built < 1900 ~ "1800s",
year_built >= 1900 & year_built < 2000 ~ "1900s",
year_built >= 2000 & year_built < 2010 ~ "2000-2010",
year_built >= 2010 & year_built < 2020 ~ "2010-2020",
year_built < 1000 | year_built >2020 ~ "Unknown"))
###EDA plot
library(gridExtra)
correlations <- read_csv("./data/statistics2.csv")
histcom<-ggplot(correlations, aes(x = HasFire, y = Commercial,fill= HasFire)) +
geom_bar(stat='identity', width =0.6) +
scale_fill_manual(values = palette2) +
theme_minimal() +
ylim(0,10) +
labs(y = "% Commercial",
title = 'Parcels with Commercial Properties') +
theme(axis.title.x=element_blank()) +
theme(plot.title = element_text(hjust = 0.5, size = 11))+
theme(axis.title.y = element_text(color = "grey20", size = 8))+
theme(legend.key.size = unit(0.5, 'cm'),legend.title = element_text(color = "grey20",size=9),legend.text = element_text(color = "grey20",size=8))
#histcom
histhot<-ggplot(correlations, aes(x = HasFire, y = Hotels_Apartments, fill= HasFire)) +
geom_bar(stat='identity', width =0.6) +
scale_fill_manual(values = palette2) +
theme_minimal() +
ylim(0,30) +
labs(y = "% Hotels & Apartments",
title = 'Parcels with Hotels & Apartments') +
theme(axis.title.x=element_blank()) +
theme(plot.title = element_text(hjust = 0.5, size = 11))+
theme(axis.title.y = element_text(color = "grey20", size = 8))+
theme(legend.key.size = unit(0.5, 'cm'),legend.title = element_text(color = "grey20",size=9),legend.text = element_text(color = "grey20",size=8))
#histhot
histrm1<-ggplot(correlations, aes(x = HasFire, y = rm1, fill= HasFire)) +
geom_bar(stat='identity', width =0.6) +
scale_fill_manual(values = palette2) +
theme_minimal() +
ylim(0,25) +
labs(y = "% RM1 Zoning Type",
title = 'Parcels with RM1 Zoning Type') +
theme(axis.title.x=element_blank()) +
theme(plot.title = element_text(hjust = 0.5, size = 11))+
theme(axis.title.y = element_text(color = "grey20", size = 8))+
theme(legend.key.size = unit(0.5, 'cm'),legend.title = element_text(color = "grey20",size=9),legend.text = element_text(color = "grey20",size=8))
#histrm1
histcmx2<-ggplot(correlations, aes(x = HasFire, y = cmx2, fill= HasFire)) +
geom_bar(stat='identity', width =0.6) +
scale_fill_manual(values = palette2) +
theme_minimal() +
ylim(0,10) +
labs(y = "% CMX2 Zoning Type",
title = 'Parcels with CMX2 Zoning Type') +
theme(axis.title.x=element_blank()) +
theme(plot.title = element_text(hjust = 0.5, size = 11))+
theme(axis.title.y = element_text(color = "grey20", size = 8))+
theme(legend.key.size = unit(0.5, 'cm'),legend.title = element_text(color = "grey20",size=9),legend.text = element_text(color = "grey20",size=8))
#histcmx2
histsea<-ggplot(correlations, aes(x = HasFire, y = sealed, fill= HasFire)) +
geom_bar(stat='identity', width =0.6) +
scale_fill_manual(values = palette2) +
theme_minimal() +
ylim(0,5) +
labs(y = "% Structurally Compromised",
title = 'Parcels with Structurally Compromised Properties') +
theme(axis.title.x=element_blank()) +
theme(plot.title = element_text(hjust = 0, size = 11))+
theme(axis.title.y = element_text(color = "grey20", size = 8))+
theme(legend.key.size = unit(0.5, 'cm'),legend.title = element_text(color = "grey20",size=9),legend.text = element_text(color = "grey20",size=8))
#histsea
histbel<-ggplot(correlations, aes(x = HasFire, y = `below average`, fill= HasFire)) +
geom_bar(stat='identity', width =0.6) +
scale_fill_manual(values = palette2) +
theme_minimal() +
ylim(0,5) +
labs(y = "% Below Average",
title = 'Parcels with Properties which are below average') +
theme(axis.title.x=element_blank()) +
theme(plot.title = element_text(hjust = 0, size = 11))+
theme(axis.title.y = element_text(color = "grey20", size = 8))+
theme(legend.key.size = unit(0.5, 'cm'),legend.title = element_text(color = "grey20",size=9),legend.text = element_text(color = "grey20",size=8))
#histbel
grid.arrange(histcom, histhot, histrm1, histcmx2, histsea, histbel, nrow = 3)
As to property category, there is a higher percentage of fire occurrence in commercial, hotels and apartments properties. As to different zoning type. There is significantly higher propertion of fire occurred in the buildings of SPENT category. Slightly more fire occurred in the properties which are sealed or structurally compromised. There is no significant difference for fire occurrence in properties with different age.
#we only use 2019 violation data
violation$year <- year(violation$violationdate)
violation = violation %>%filter(year == 2019) %>%filter(is.na(opa_account_num)==FALSE)
#drop duplication
violation_distinct <- violation %>%
distinct(opa_account_num,violationcodetitle, .keep_all = TRUE)
#check duplication
#violation_group<- violation_distinct %>%group_by(opa_account_num)
#violation_cnt<- summarise(violation_group,count = n())
violation_distinct <-mutate(violation_distinct, violation_summary = case_when(
str_detect(violationcodetitle, "UNSAFE STRUCTURE") == TRUE | str_detect(violationcodetitle, "UNSAFE CONDITIONS") == TRUE | str_detect(violationcodetitle, "INTERIOR UNSAFE") == TRUE | str_detect(violationcodetitle, "VACANT PROP UNSAFE") == TRUE ~ "UNSAFE STRUCTURE",
str_detect(violationcode, "FC-13") == TRUE | str_detect(violationcode, "FC-907.3") == TRUE |
violationcodetitle == "ELECTRICAL -FIRE DAMAGED"~ "PROBLEMS WITH FIRE EQUIPTEMENT",
TRUE ~ "OTHERS"))
violation_distinct <- rename(violation_distinct, OPA_Num = opa_account_num)
violation_unsafe = violation_distinct %>%filter(violation_summary=="UNSAFE STRUCTURE")
violation_equip = violation_distinct %>%filter(violation_summary=="PROBLEMS WITH FIRE EQUIPTEMENT")
#drop duplication again
violation_unsafe <- violation_unsafe %>%
distinct(OPA_Num,violation_summary, .keep_all = TRUE)
violation_equip <- violation_unsafe %>%
distinct(OPA_Num,violation_summary, .keep_all = TRUE)
violation_unsafe_sub <- subset(violation_unsafe,select=c(violation_summary,OPA_Num))
violation_unsafe_sub <- rename(violation_unsafe_sub, vio_unsafe = violation_summary)
violation_equip_sub <- subset(violation_equip,select=c(violation_summary,OPA_Num))
violation_equip_sub <- rename(violation_equip_sub, vio_equip = violation_summary)
parcel.sf2 <- left_join(parcel.sf2,violation_unsafe_sub,by='OPA_Num')
parcel.sf2 <- left_join(parcel.sf2,violation_equip_sub,by='OPA_Num')
parcel.sf2 <-mutate(parcel.sf2, isunsafe = case_when(
is.na(vio_unsafe) == FALSE ~ "Is safe",
is.na(vio_unsafe) == TRUE ~ "Is unsafe"))
parcel.sf2 <-mutate(parcel.sf2, equip_pro = case_when(
is.na(vio_equip) == FALSE ~ "no problem",
is.na(vio_equip) == TRUE ~ "has problem"))
#violation plot
histuns<-ggplot(correlations, aes(x = HasFire, y = `unsafe structure`, fill= HasFire)) +
geom_bar(stat='identity', width =0.6) +
scale_fill_manual(values = palette2) +
theme_minimal() +
ylim(0,5) +
labs(y = "% Unsafe Structure",
title = 'Parcels with Properties which \n have unsafe structure') +
theme(axis.title.x=element_blank()) +
theme(plot.title = element_text(hjust = 0.5, size = 13))+
theme(axis.title.y = element_text(color = "grey20", size = 10))+
theme(legend.key.size = unit(0.5, 'cm'),legend.title = element_text(color = "grey20",size=9),legend.text = element_text(color = "grey20",size=10))
#histuns
histequ<-ggplot(correlations, aes(x =HasFire, y = `fire equriment`, fill= HasFire)) +
geom_bar(stat='identity', width =0.6) +
scale_fill_manual(values = palette2) +
theme_minimal() +
ylim(0,15) +
labs(y = "% Have Problems with Fire Equipment",
title = 'Parcels with Properties which have \n Problems with Fire Equipment') +
theme(axis.title.x=element_blank()) +
theme(plot.title = element_text(hjust = 0.5, size = 13))+
theme(axis.title.y = element_text(color = "grey20", size = 10))+
theme(legend.key.size = unit(0.5, 'cm'),legend.title = element_text(color = "grey20",size=9),legend.text = element_text(color = "grey20",size=10))
#histequ
grid.arrange(histuns,histequ, nrow = 1)
request <- read_csv("./data/public_cases_fc2019.csv") %>%
drop_na(lat)
request.sf <- request %>%
st_as_sf(coords = c("lon", "lat"), crs = 4326) %>%
st_transform(2272)
parcel.sf2$request19.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(request.sf),5)
request.sf2 <- request.sf %>% filter(request.sf$service_name == 'Alley Light Outage')
parcel.sf2$light.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(request.sf2),5)
request.sf3 <- request.sf %>% filter(request.sf$service_name == 'No Heat (Residential)' )
parcel.sf2$heat.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(request.sf3),5)
request.sf4 <- request.sf %>% filter(request.sf$service_name == 'Fire Residential or Commercial')
parcel.sf2$fire311.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(request.sf4),5)
request.sf5 <- request.sf %>% filter(request.sf$service_name == 'Infestation Residential' )
parcel.sf2$infestation.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(request.sf5),5)
request.sf6 <- request.sf %>% filter(request.sf$service_name == 'Smoke Detector' )
parcel.sf2$Detector.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(request.sf6),5)
request.sf7 <- request.sf %>% filter(request.sf$service_name == 'Building Dangerous' )
parcel.sf2$Dangerous.nn5 <-nn_function(st_coordinates(parcel.sf2),st_coordinates(request.sf7),5)
## summary all 311
request19 <- parcel.sf2 %>%
ggplot(aes(isfire, request19.nn5, fill=isfire)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean", width =0.6) +
scale_fill_manual(values = palette2) +
# ylim(0,90)+
labs(x="isfire", y="Value",
title = "311 Request associations with the\n likelihood of fire (2019, nearest 5)") +
theme(legend.position = "none") +
theme(plot.title = element_text(hjust = 0.5, size = 10))+
theme(axis.title.y = element_text(color = "grey20", size = 8))
#request19
light <- parcel.sf2 %>%
ggplot(aes(isfire, light.nn5, fill=isfire)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean", width =0.6) +
scale_fill_manual(values = palette2) +
labs(x="isfire", y="Value",
title = "Alley Light Outage associations with\n the likelihood of fire (2019, nearest 5)") +
theme(legend.position = "none")+
theme(plot.title = element_text(hjust = 0.5, size = 10))+
theme(axis.title.y = element_text(color = "grey20", size = 8))
heat <- parcel.sf2 %>%
ggplot(aes(isfire, heat.nn5, fill=isfire)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean", width =0.6) +
scale_fill_manual(values = palette2) +
labs(x="isfire", y="Value",
title = "No Heat (Residential) associations with\n the likelihood of fire (2019, nearest 5)") +
theme(legend.position = "none")+
theme(plot.title = element_text(hjust = 0.5, size = 10))+
theme(axis.title.y = element_text(color = "grey20", size = 8))
fire311 <- parcel.sf2 %>%
ggplot(aes(isfire, fire311.nn5, fill=isfire)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean", width =0.6) +
scale_fill_manual(values = palette2) +
labs(x="isfire", y="Value",
title = "Fire Residential or Commercial associations with\n the likelihood of fire (2019, nearest 5)") +
theme(legend.position = "none")+
theme(plot.title = element_text(hjust = 0.5, size = 10))+
theme(axis.title.y = element_text(color = "grey20", size = 8))
infestation <- parcel.sf2 %>%
ggplot(aes(isfire, infestation.nn5, fill=isfire)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean", width =0.6) +
scale_fill_manual(values = palette2) +
labs(x="isfire", y="Value",
title = "Infestation Residential associations with\n the likelihood of fire (2019, nearest 5)") +
theme(legend.position = "none")+
theme(plot.title = element_text(hjust = 0.5, size = 10))+
theme(axis.title.y = element_text(color = "grey20", size = 8))
Detector <- parcel.sf2 %>%
ggplot(aes(isfire, Detector.nn5, fill=isfire)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean", width =0.6) +
scale_fill_manual(values = palette2) +
labs(x="isfire", y="Value",
title = "Smoke Detector associations with\n the likelihood of fire (2019, nearest 5)") +
theme(legend.position = "none")+
theme(plot.title = element_text(hjust = 0.5, size = 10))+
theme(axis.title.y = element_text(color = "grey20", size = 8))
Dangerous <- parcel.sf2 %>%
ggplot(aes(isfire, Dangerous.nn5, fill=isfire)) +
geom_bar(position = "dodge", stat = "summary", fun.y = "mean", width =0.6) +
scale_fill_manual(values = palette2) +
labs(x="isfire", y="Value",
title = "Building Dangerous associations with \nthe likelihood of fire (2019, nearest 5)") +
theme(legend.position = "none")+
theme(plot.title = element_text(hjust = 0.5, size = 10))+
theme(axis.title.y = element_text(color = "grey20", size = 8))
grid.arrange(request19,light,heat,fire311,infestation,Detector,Dangerous, nrow = 4)
library(plotROC)
library(pROC)
library(pscl)
model_data <- parcel.sf2
#colSums(is.na(parcel.sf2))
model_data <- subset(model_data, select = c(lagfire20.nn5, lagfire19.nn5, lagfire18.nn5,
lagfire17.nn5, lagfire16.nn5, lagfire15.nn5,
category, interior,vio_unsafe, vio_equip,
request19.nn5, light.nn5, heat.nn5,
fire311.nn5, infestation.nn5, Detector.nn5,
Dangerous.nn5,isfire,zoning,census_tract,Total_Pop,Med_Inc,Med_Age,Percent_White
,descript))
colSums(is.na(model_data))
## lagfire20.nn5 lagfire19.nn5 lagfire18.nn5 lagfire17.nn5 lagfire16.nn5
## 0 0 0 0 0
## lagfire15.nn5 category interior vio_unsafe vio_equip
## 0 0 25848 487556 487556
## request19.nn5 light.nn5 heat.nn5 fire311.nn5 infestation.nn5
## 0 0 0 0 0
## Detector.nn5 Dangerous.nn5 isfire zoning census_tract
## 0 0 0 51 0
## Total_Pop Med_Inc Med_Age Percent_White descript
## 0 317 273 273 475726
## geometry
## 0
model_data$fire <- ifelse(model_data$isfire=='Have fire',1,0)
table(model_data$fire)
##
## 0 1
## 475726 13899
model_data$isunsafe<-ifelse(is.na(model_data$vio_unsafe),0,1)
model_data$noequip<-ifelse(is.na(model_data$vio_equip),0,1)
model_data$iscom<-ifelse(model_data$category=='Commercial',1,0)
model_data$ishotel<-ifelse(model_data$category=='Hotels and Apartments',1,0)
model_data$isRM1<-ifelse(is.na(model_data$zoning)|model_data$zoning!='RM1',0,1)
model_data$isCMX2<-ifelse(is.na(model_data$zoning)|model_data$zoning!='CMX2',0,1)
model_data$issealed<-ifelse(is.na(model_data$interior)|model_data$interior!='Sealed / Structurally Compromised',0,1)
model_data$isbelow<-ifelse(is.na(model_data$interior)|model_data$interior!='Below Average',0,1)
####Down sampling
model_data_havefire = model_data %>%
filter(fire ==1)
model_data_havefire = model_data_havefire%>%
filter(model_data_havefire$descript == 'Building Fire, No Collapse' |model_data_havefire$descript =='Building fire' )
model_data_havefire<- subset(model_data_havefire, select = c(-descript))
#table(model_data_havefire$fire)
model_data_nofire= model_data %>% filter(fire == 0)
#table(model_data_nofire$fire)
#select 108920 random rows from temp 0 (27230 *4=108920)
random_nofire <- model_data_nofire[sample(nrow(model_data_nofire), 27230), ]
random_nofire<- subset(random_nofire, select = c(-descript))
#rbind 8900 rows of 0 to the 2225 rows of 1
model_data_new <- rbind(model_data_havefire,random_nofire)
#shuffle rows - generate a random ordering
set.seed(2021) ## make reproducible here, but not if generating many random samples
rand <- sample(nrow(model_data_new))
model_data_new<-model_data_new[rand,]
model_data_new2<-model_data_new %>% st_set_geometry(NULL)
####Modeling
set.seed(3456)
trainIndex <- createDataPartition(model_data_new2$fire, p = .75,
list = FALSE,
times = 1)
Train <- model_data_new2[ trainIndex,]
Test <- model_data_new2[-trainIndex,]
#logistic regression
log_reg <- glm(fire ~ ., data=Train %>%
dplyr::select(-category, -interior, -vio_unsafe,
-vio_equip,-zoning,-isfire,-census_tract),
family="binomial" (link="logit"))
summary(log_reg)
##
## Call:
## glm(formula = fire ~ ., family = binomial(link = "logit"), data = Train %>%
## dplyr::select(-category, -interior, -vio_unsafe, -vio_equip,
## -zoning, -isfire, -census_tract))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3548 -0.6368 -0.5114 -0.2971 3.7540
##
## Coefficients: (4 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.494670765 0.169256599 -2.923 0.003471 **
## lagfire20.nn5 -0.000007180 0.000006149 -1.168 0.242932
## lagfire19.nn5 -0.000583190 0.000068926 -8.461 < 0.0000000000000002 ***
## lagfire18.nn5 -0.001008652 0.000077358 -13.039 < 0.0000000000000002 ***
## lagfire17.nn5 NA NA NA NA
## lagfire16.nn5 NA NA NA NA
## lagfire15.nn5 NA NA NA NA
## request19.nn5 0.000232879 0.000563935 0.413 0.679641
## light.nn5 0.000015375 0.000017277 0.890 0.373524
## heat.nn5 0.000106508 0.000032635 3.264 0.001100 **
## fire311.nn5 -0.000146578 0.000036550 -4.010 0.0000606302210 ***
## infestation.nn5 0.000016916 0.000024944 0.678 0.497659
## Detector.nn5 0.000086793 0.000043630 1.989 0.046667 *
## Dangerous.nn5 0.000151550 0.000042382 3.576 0.000349 ***
## Total_Pop 0.000022117 0.000012084 1.830 0.067211 .
## Med_Inc -0.000007316 0.000001544 -4.739 0.0000021429841 ***
## Med_Age -0.001001476 0.004008244 -0.250 0.802700
## Percent_White -0.556451180 0.106896211 -5.206 0.0000001934462 ***
## isunsafe 2.352188931 0.145348821 16.183 < 0.0000000000000002 ***
## noequip NA NA NA NA
## iscom 0.871883905 0.133509533 6.530 0.0000000000656 ***
## ishotel 0.583712862 0.060911728 9.583 < 0.0000000000000002 ***
## isRM1 -0.018165175 0.045788636 -0.397 0.691575
## isCMX2 0.262196209 0.090736453 2.890 0.003857 **
## issealed 1.241633263 0.105719328 11.745 < 0.0000000000000002 ***
## isbelow 0.449683493 0.085258135 5.274 0.0000001332083 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 21300 on 24246 degrees of freedom
## Residual deviance: 19453 on 24225 degrees of freedom
## (25 observations deleted due to missingness)
## AIC: 19497
##
## Number of Fisher Scoring iterations: 5
pR2(log_reg)
## llh llhNull G2 McFadden r2ML
## -9726.55516925 -10667.48760456 1881.86487063 0.08820563 0.07467687
## r2CU
## 0.12761432
testProbs <- data.frame(Outcome = as.factor(Test$fire),
Probs = predict(log_reg, Test, type= "response"))
# head(testProbs)
ggplot(testProbs, aes(x = Probs, fill = as.factor(Outcome))) +
geom_density() +
facet_grid(Outcome ~ .) +
scale_fill_manual(values = palette3) +
labs(x = "Fire", y = "Density of probabilities",
title = "Distribution of predicted probabilities by observed outcome") +
theme(strip.text.x = element_text(size = 18),
legend.position = "none")
testProbs <-
testProbs %>%
mutate(predOutcome = as.factor(ifelse(testProbs$Probs > 0.5 , 1, 0)))
# testProbs
The Model is better at predicting those parcels that don’t have fire occurance
#(1)Accuracy: the rate of true positives plus true negatives divided by the total number of observations.
#(2)Sensitivity(true positive rate): the proportion of actual positives (1’s) that were predicted to be positive
#(3)Specificity(true negative rate): the proportion of actual negatives (0’s) that were predicted to be negatives
caret::confusionMatrix(testProbs$predOutcome, testProbs$Outcome,
positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 6776 1173
## 1 56 76
##
## Accuracy : 0.8479
## 95% CI : (0.8399, 0.8557)
## No Information Rate : 0.8454
## P-Value [Acc > NIR] : 0.275
##
## Kappa : 0.083
##
## Mcnemar's Test P-Value : <0.0000000000000002
##
## Sensitivity : 0.060849
## Specificity : 0.991803
## Pos Pred Value : 0.575758
## Neg Pred Value : 0.852434
## Prevalence : 0.154560
## Detection Rate : 0.009405
## Detection Prevalence : 0.016335
## Balanced Accuracy : 0.526326
##
## 'Positive' Class : 1
##
ggplot(testProbs, aes(d = as.numeric(testProbs$Outcome), m = Probs)) +
geom_roc(n.cuts = 50, labels = FALSE, colour = "#FE9900") +
style_roc(theme = theme_grey) +
geom_abline(slope = 1, intercept = 0, size = 1.5, color = 'grey') +
labs(title = "ROC Curve - logistic regression model")
library(tidyverse)
library(httr)
base_url <- "http://api.phila.gov/ais_doc/v1/"
endpoint <- "search/"
address <- "1234%20market%20st"
key <- "?gatekeeperKey=6ba4de64d6ca99aa4db3b9194e37adbf"
url <- paste(base_url, endpoint, address, key, sep="")
# response <- httr::GET("http://api.phila.gov/ais_doc/v1/search/1234%20market%20st?gatekeeperKey=6ba4de64d6ca99aa4db3b9194e37adbf")
response <- httr::GET(url)
tidy_res <- httr::content(response, simplifyVector=TRUE)
glimpse(tidy_res$features)
glimpse(tidy_res$features$properties)
glimpse(tidy_res$features$geometry)